/*******************************************************************************
																			
	DESCRIPTION: 	This code takes predictions for individuals from year X 
					made using models from other years and combines them to prepare
					the data for the individual regressions of predictions on the 
					unemployment rate to investigate the cyclicality of JFR.		

*******************************************************************************/

clear all
global id_code 114
set seed 2110

/***********************************************************************
* Part A: Analysis of predictions for individuals from 2006
*		trained on all years
************************************************************************/
	
local model Full		
local model_indiv Full		
local indiv 2006
/***********************************************************************
* A1: Combining the data from all years - inidviduals are fixed
************************************************************************/

use "${data}/003_MainWithEnsemblePred_`model_indiv'_`indiv'Individuals_TrainedOn1992modelIndividuals_`model'.dta", clear

gen PredictionYear = 1992 // This is the year of the model used to make predictions.

forval year = 1993/2016 {

	* Loading data with predictions from ensemble model
	append using "${data}/003_MainWithEnsemblePred_`model_indiv'_`indiv'Individuals_TrainedOn`year'modelIndividuals_`model'.dta"
	replace PredictionYear = `year' if PredictionYear==.
	
}

rename year IndivYear 
label var IndivYear "year when individuals became unemployed"
rename PredictionYear year 
label var year "year of the model used to make predictions"
		
* Keep only relevant vars
keep Lop* InLnr IndivYear year p_emplAft6M_0M_In

* Keep only indiv for whom the prediction is not missing
keep if p_emplAft6M_0M_In != .

/***********************************************************************
* A2: Merging with data on unemployment rates
************************************************************************/

merge m:1 year using "${data}/101_Employed_Unemployed.dta", keepusing (shareUnempSS)
keep if _merge==3
drop _merge
		
/***********************************************************************
* A3: Generating logs of variables and relative variables
************************************************************************/

* Create log of predicted job-finding rates		
gen log_p_emplAft6M_0M_In = log(p_emplAft6M_0M_In)

* Create relative unemployment rate (base=2006)
gen shareUnempSS_2006_temp = shareUnempSS if year==2006
egen shareUnempSS_2006= mean(shareUnempSS_2006_temp)
gen shareUnemp_rel = shareUnempSS/shareUnempSS_2006
drop shareUnempSS_2006 shareUnempSS_2006_temp

* Create log of relative unemployment rate (base=2006)
gen log_shareUnemp_rel = log(shareUnemp_rel)

* Create a time trend variable
gen trend_rel = year - 2006

/***********************************************************************
* A4: Preparing panel dataset
************************************************************************/

* Create unique person-spell identifier
preserve 

	duplicates drop LopNr_PersonNr InLnr, force
	gen key = _n
	sum key // 123 469 unique IDs
	keep LopNr_PersonNr InLnr key

	tempfile temp
	save `temp'

restore

merge m:1 LopNr_PersonNr InLnr using `temp'
drop _merge

* Save data for R:
save "${data}/${id_code}_RPanelRegression_Full_2006_relative_trend.dta", replace